################
#Molly Roberts and Megan Westrum's Replication File
#########

#Install data

setwd("/Users/mollyroberts/Winter2010/Gov2001/Final Project")
library(foreign)
fdi <- read.dta("MilnerButhe2.dta")
library(lmtest)
library(kinship)
library(Formula)
library(plm)
library(apsrtable)
library(tseries)
#Necessary functions

clx <- function(fm, time, cluster){
library(sandwich)
library(lmtest)
T <- length(unique(time))
N <- length(cluster)
dfc <- (N)/(N - T)
u <- apply(estfun(fm),2,
function(x) tapply(x, cluster, sum))
vcovCL <- dfc*sandwich(fm, meat=crossprod(u)/N)
coeftest(fm, vcovCL)}

clus <- function(fm, cluster){
library(sandwich)
library(lmtest)
N <- length(cluster)
u <- apply(estfun(fm),2,
function(x) tapply(x, cluster, sum))
vcovCL <- sandwich(fm, meat=crossprod(u)/N)
coeftest(fm, vcovCL) }

####
#Variable Set-up
#####

vars.4 <- c("fdi_inflow_unctad_gdp", "lag_pta_cuml", "lag_gattwto", "lag_trade_pgdp", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr","date", "country", "fdi_detrended", "lag_pta_cuml_det", "ctylabel", "region_wb")
fdi4 <- na.omit(fdi[,vars.4])
fdi4$date2 <- fdi4$date^2
fdi4$date3 <- fdi4$date^3

#####
#Plot of Trends, Figure 1
#####
meanfdi <- tapply(fdi4$fdi_inflow_unctad_gdp, fdi4$date, mean)
meanpta <- tapply(fdi4$lag_pta_cuml, fdi4$date, mean)
names(meanfdi) <- names(meanpta) <-  seq(1970, 1999)
plot(names(meanfdi), meanfdi, xlab="Date", ylab="FDI Inflows as % of GDP, Cumulative PTAs", type="l")
lines(names(meanpta), meanpta, col="red")
text(1990, 3, "Mean PTAs", col="red")
text(1988, 1.5, "Mean FDI Inflows")

#####
#Plot of Milner Buthe Detrending, Figure 2
#####
meanfdi <- tapply(fdi4$fdi_detrended, fdi4$date, mean)
meanpta <- tapply(fdi4$lag_pta_cuml_det, fdi4$date, mean)
names(meanfdi) <- names(meanpta) <-  seq(1970, 1999)
plot(names(meanfdi), meanfdi, xlab="Date", ylab="Detrended FDI Inflows as % of GDP, Cumulative PTAs", type="l")
lines(names(meanpta), meanpta, col="red")
text(1975, 0, "Mean PTAs", col="red")
text(1975, .75, "Mean FDI Inflows")

#Detrend Variables Quadratically

lm1 <- lm(fdi4$fdi_inflow_unctad_gdp ~ fdi4$date + fdi4$date2 + fdi4$date3 + factor(fdi4$country))
fdi4$fdi_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_pta_cuml ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pta_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gattwto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gattwto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_bits_cuml_restricted ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$bits_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_polconiii_2002 ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pol_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_pop ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pop_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_gdp_pc_95d ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdppc_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gdp_gr ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdpgr_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gatt ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gatt_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_wto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$wto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals

#####
#Plot of Our Detrending, Figure 3
#####
meanfdi <- tapply(fdi4$fdi_det2, fdi4$date, mean)
meanpta <- tapply(fdi4$pta_det2, fdi4$date, mean)
names(meanfdi) <- names(meanpta) <-  seq(1970, 1999)
plot(names(meanfdi), meanfdi, xlab="Date", ylab="Detrended FDI Inflows as % of GDP, Cumulative PTAs", type="l")
lines(names(meanpta), meanpta, col="red")
text(1987, 0.3, "Mean PTAs", col="red")
text(1977, 0.4, "Mean FDI")

####
#Replication of Milner and Buthe Table with Variables Quadratically Detrended
###
#Model 4
se4 <- lm(fdi_det2 ~ pta_det2 + gattwto_det2 + bits_det2 + pol_det2 + lag_polinstability_wvo + pop_det2 + gdppc_det2 + gdpgr_det2 + factor(country), data=fdi4)
mod4 <- clx(se4, fdi4$date, fdi4$country)

###
Model5
###
vars.4 <- c("fdi_inflow_unctad_gdp", "lag_pta_cuml", "lag_gattwto", "lag_trade_pgdp", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr", "lag_ptareg_v2", "lag_probmean","date", "country", "fdi_detrended", "lag_pta_cuml_det")
fdi4 <- na.omit(fdi[,vars.4])
fdi4$date2 <- fdi4$date^2
fdi4$date3 <- fdi4$date^3

#Retrend variables again using "Retrending" section above
lm1 <- lm(fdi4$fdi_inflow_unctad_gdp ~ fdi4$date + fdi4$date2 + fdi4$date3 + factor(fdi4$country))
fdi4$fdi_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_pta_cuml ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pta_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gattwto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gattwto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_bits_cuml_restricted ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$bits_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_polconiii_2002 ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pol_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_pop ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pop_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_gdp_pc_95d ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdppc_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gdp_gr ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdpgr_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gatt ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gatt_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_wto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$wto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals


se5 <- ivreg(fdi_det2 ~ pta_det2 + gattwto_det2 + bits_det2 + pol_det2 + lag_polinstability_wvo + pop_det2 + gdppc_det2 + gdpgr_det2 + factor(country), instruments=~+ gattwto_det2 + bits_det2 + pol_det2 + lag_polinstability_wvo + pop_det2 + gdppc_det2 + gdpgr_det2 + factor(country) + lag_ptareg_v2 + lag_probmean, data=fdi4)
mod5 <- clus(se5, fdi4$country)

####
Model 8
####
vars.4 <- c("fdi_inflow_unctad_gdp", "lag_pta_cuml", "lag_gattwto", "lag_trade_pgdp", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr","date", "country", "fdi_detrended", "lag_pta_cuml_det", "lag_wto", "lag_gatt")
fdi4 <- na.omit(fdi[,vars.4])
fdi4$date2 <- fdi4$date^2
fdi4$date3 <- fdi4$date^3

#Redetrend Variables using code in "Redetrend" section
lm1 <- lm(fdi4$fdi_inflow_unctad_gdp ~ fdi4$date + fdi4$date2 + fdi4$date3 + factor(fdi4$country))
fdi4$fdi_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_pta_cuml ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pta_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gattwto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gattwto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_bits_cuml_restricted ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$bits_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_polconiii_2002 ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pol_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_pop ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pop_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_gdp_pc_95d ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdppc_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gdp_gr ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdpgr_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gatt ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gatt_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_wto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$wto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals

se8 <- lm(fdi_det2 ~ pta_det2 + gatt_det2 + wto_det2 + bits_det2 + pol_det2 + lag_polinstability_wvo + pop_det2 + gdppc_det2 + gdpgr_det2 + factor(country), data=fdi4)

mod8 <-clx(se8, fdi4$date, fdi4$country)


####
Model 9
#####
vars.4 <- c("fdi_inflow_unctad_gdp", "lag_pta_cuml", "lag_gattwto", "lag_trade_pgdp", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr","date", "country", "fdi_detrended", "lag_pta_cuml_det", "lag_wto", "lag_gatt")
fdi4 <- na.omit(fdi[,vars.4])
fdi4$date2 <- fdi4$date^2
fdi4$date3 <- fdi4$date^3

#Redetrend Variables
lm1 <- lm(fdi4$fdi_inflow_unctad_gdp ~ fdi4$date + fdi4$date2 + fdi4$date3 + factor(fdi4$country))
fdi4$fdi_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_pta_cuml ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pta_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gattwto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gattwto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_bits_cuml_restricted ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$bits_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_polconiii_2002 ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pol_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_pop ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pop_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_gdp_pc_95d ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdppc_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gdp_gr ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdpgr_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gatt ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gatt_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_wto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$wto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals

se9 <- lm(fdi_det2 ~ pta_det2 + trade_det2 + gatt_det2 + wto_det2 + bits_det2 + pol_det2 + lag_polinstability_wvo + pop_det2 + gdppc_det2 + gdpgr_det2 + factor(country), data=fdi4)

mod9 <- clx(se9, fdi4$date, fdi4$country)

se4$se <- mod4[,2]
se8$se <- mod8[,2]
se9$se <- mod9[,2]
apsrtable(se4, se8, se9, digits=4)

###########
#Algeria and Venezuela Trends
########

#Data set-up
vars.4 <- c("fdi_inflow_unctad_gdp", "lag_pta_cuml", "lag_gattwto", "lag_trade_pgdp", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr","date", "country", "fdi_detrended", "lag_pta_cuml_det", "ctylabel", "region_wb")
fdi4 <- na.omit(fdi[,vars.4])
fdi4$date2 <- fdi4$date^2
fdi4$date3 <- fdi4$date^3



lm1 <- lm(fdi4$fdi_inflow_unctad_gdp ~ fdi4$date + fdi4$date2 + fdi4$date3 + factor(fdi4$country))
fdi4$fdi_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_pta_cuml ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pta_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gattwto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gattwto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_bits_cuml_restricted ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$bits_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_polconiii_2002 ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pol_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_pop ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$pop_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_ln_gdp_pc_95d ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdppc_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gdp_gr ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gdpgr_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_gatt ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$gatt_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_wto ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$wto_det2 <- lm1$residuals
lm1 <- lm(fdi4$lag_trade_pgdp ~ fdi4$date + fdi4$date2 + factor(fdi4$country))
fdi4$trade_det2 <- lm1$residuals

fdiv <- subset(fdi4, fdi4$ctylabel=="Venezuela")
fdia <- subset(fdi4, fdi4$ctylabel=="Algeria")

########
#Figure 4: No Detrending
#######
par(mfrow=c(2,2))
ven.lo <- loess.smooth(fdiv$date, fdiv$fdi_inflow_unctad_gdp)
plot(ven.lo, col="red", type="l", main="Venezuela FDI", xlab="Year", ylab="FDI", ylim=c(-1,4))
points(fdiv$date, fdiv$fdi_inflow_unctad_gdp)
ven.lo2 <- loess.smooth(fdiv$date, fdiv$lag_pta_cuml)
plot(ven.lo2, type="l", main="Venezuela PTAs", xlab="Year", ylab="Cumulative PTAs", ylim=c(-1,8), col="red")
points(fdiv$date, fdiv$lag_pta_cuml)
ven.lo <- loess.smooth(fdia$date, fdia$fdi_inflow_unctad_gdp)
plot(ven.lo, col="red", type="l", main="Algeria FDI", xlab="Year", ylab="FDI", ylim=c(-1,4))
points(fdia$date, fdia$fdi_inflow_unctad_gdp)
ven.lo2 <- loess.smooth(fdia$date, fdia$lag_pta_cuml)
plot(ven.lo2, type="l", main="Algeria PTAs", xlab="Year", ylab="Cumulative PTAs", ylim=c(-1,8), col="red")
points(fdia$date, fdia$lag_pta_cuml)

#######
#Figure 5: Milner/Buthe Detrending
#######
par(mfrow=c(2,2))
ven.lo <- loess.smooth(fdiv$date, fdiv$fdi_detrended)
plot(ven.lo, col="red", type="l",main="Venezuela FDI Detrended Linearly", xlab="Year", ylab="FDI Detrended Linearly",ylim=c(-1,4))
points(fdiv$date, fdiv$fdi_detrended)
ven.lo2 <- loess.smooth(fdiv$date, fdiv$lag_pta_cuml_det)
plot(ven.lo2, type="l", main="Venezuela PTAs Detrended Linearly", xlab="Year", ylab="PTAs Detrended Linearly", ylim=c(-1,8), col="red")
points(fdiv$date, fdiv$lag_pta_cuml_det)

ven.lo <- loess.smooth(fdia$date, fdia$fdi_detrended)
plot(ven.lo, col="red", type="l",main="Algeria FDI Detrended Linearly", xlab="Year", ylab="FDI Detrended Linearly",ylim=c(-1,4))
points(fdia$date, fdia$fdi_detrended)
ven.lo2 <- loess.smooth(fdi4$date, fdi4$lag_pta_cuml_det)
plot(ven.lo2, type="l", main="Algeria PTAs Detrended Linearly", xlab="Year", ylab="PTAs Detrended Linearly", ylim=c(-1,8), col="red")
points(fdia$date, fdia$lag_pta_cuml_det)

#######
#Figure 6: Quadratically Detrending
######
par(mfrow=c(2,2))
ven.lo <- loess.smooth(fdiv$date, fdiv$fdi_det2)
plot(ven.lo, col="red", type="l", main="Venezuela FDI Detrended Quadratically", xlab="Year", ylab="FDI Detrended Quadratically",ylim=c(-1,4))
points(fdiv$date, fdiv$fdi_det2)
ven.lo2 <- loess.smooth(fdiv$date, fdiv$pta_det2)
plot(ven.lo2, type='l', main="Venezuela PTAs Detrended Quadratically", xlab="Year", ylab="PTAs Detrended Quadratically", ylim=c(-1,8), col="red")
points(fdiv$date, fdiv$pta_det2)


ven.lo <- loess.smooth(fdia$date, fdia$fdi_det2)
plot(ven.lo, col="red", type="l", main="Algeria FDI Detrended Quadratically", xlab="Year", ylab="FDI Detrended Quadratically",ylim=c(-1,4))
points(fdia$date, fdia$fdi_det2)
ven.lo2 <- loess.smooth(fdia$date, fdia$pta_det2)
plot(ven.lo2, type='l', main="Algeria PTAs Detrended Quadratically", xlab="Year", ylab="PTAs Detrended Quadratically", ylim=c(-1,8), col="red")
points(fdia$date, fdia$pta_det2)

#####
#ADF Tests: Code for Each Detrending Strategy
####

###
#No Detrending Strategy
###
uroutcome <- NULL
country <- unique(fdi4$country)
for (i in 1:length(unique(fdi4$country))){
	fdiv <- fdi4$fdi_inflow_unctad_gdp[fdi4$country==country[i]]
	if(length(fdiv)>6){
	test <- adf.test(as.vector(fdiv), alternative="stationary")
	uroutcome[i] <- test$p.value
	}
	else
	uroutcome[i]<- NA
	}

u2 <- na.omit(uroutcome)
length(u2[u2<.1])/length(u2)

####
#Linear Detrending Strategy
####

uroutcome <- NULL
country <- unique(fdi4$country)
for (i in 1:length(unique(fdi4$country))){
	fdiv <- fdi4$fdi_detrended[fdi4$country==country[i]]
	if(length(fdiv)>6){
	test <- adf.test(as.vector(fdiv), alternative="stationary")
	uroutcome[i] <- test$p.value
	}
	else
	uroutcome[i]<- NA
	}
	
u2 <- na.omit(uroutcome)
length(u2[u2<.1])/length(u2)

####
#Quadratic Detrending Strategy
#####
uroutcome <- NULL
country <- unique(fdi4$country)
for (i in 1:length(unique(fdi4$country))){
	fdiv <- fdi4$fdi_det2[fdi4$country==country[i]]
	if(length(fdiv)>6){
	test <- adf.test(as.vector(fdiv), alternative="stationary")
	uroutcome[i] <- test$p.value
	}
	else
	uroutcome[i]<- NA
	}
	
u2 <- na.omit(uroutcome)
length(u2[u2<.1])/length(u2)


########
#Matching
########

#Coarsened Exact Matching 

setwd("/Users/mollyroberts/Winter2010/Gov2001/Final Project")
library(foreign)
fdi <- read.dta("MilnerButhe2.dta")
library(lmtest)
library(kinship)
library(Formula)
library(plm)
library(xtable)
library(cem)
#First take difference in PTAs
country <- unique(fdi$country)
fdi <- fdi[order(fdi$date),]
#Create lagged y
for (i in 1:length(unique(fdi$country))){
	pta1 <- fdi$lag_pta_cuml[fdi$country==country[i]]
	n <- length(pta1)
	pta2 <- c(NA,pta1[1:(n-1)])
	fdi$pta[fdi$country==country[i]] <- pta1-pta2
	}
for (i in 1:length(unique(fdi$country))){
	pta1 <- fdi$lag_bits_cuml_restricted[fdi$country==country[i]]
	n <- length(pta1)
	pta2 <- c(NA,pta1[1:(n-1)])
	fdi$bits[fdi$country==country[i]] <- pta1-pta2
	}

fdi$pta1 <- ifelse(fdi$pta>0, 1,0)

#1994 Analysis

vars.4 <- c("fdi_detrended", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr", "date", "country", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_gattwto", "lag_pta_cuml", 
"fdi_inflow_unctad_gdp", "pta", "lag_gatt", "lag_wto", "ctylabel", "bits", "pta1", "ln_gdp_95d")
fdi4 <- na.omit(fdi[,vars.4])
fdi4 <- subset(fdi4, fdi4$date==1994)
fdi2 <- na.omit(fdi[,vars.4])
country <- unique(fdi4$country)
fdi$trend <- NULL
fdi$trendsq <- NULL
for(i in 1:length(country)){
data <- subset(fdi2, fdi2$country==country[i])
datestart <- 1971
dateend <- unique(fdi4$date)
data <- subset(data, data$date>datestart & data$date<dateend)
data$datesq <- data$date^2
if(length(data$date)>4){
lm.1 <- lm(fdi_inflow_unctad_gdp~date + datesq, data=data)
p.value <- xtable(lm.1)[2,4]
fdi4$trend[i] <- lm.1$coefficients[2]
fdi4$trendsq[i] <- lm.1$coefficients[3]
#fdi4$trend[i] <- ifelse(p.value<.1,lm.1$coefficients[2],0)
}
else{
fdi4$trend[i] <- 0
}
}


#Match on number of PTAs in previous years

fdi$ptatrend <- NULL
for(i in 1:length(country)){
datestart <- unique(fdi4$date)-1
dateend <- unique(fdi4$date)
data <- subset(fdi2, fdi2$country==country[i])
data <- subset(data, data$date==datestart)
if(length(data$date)>0){
fdi4$ptatrend[i] <- data$lag_pta_cuml
}
else{
fdi4$ptatrend[i] <- 0
}
}

fdi$ptatrend <- NULL
for(i in 1:length(country)){
data <- subset(fdi2, fdi2$country==country[i])
datestart <- 1971
dateend <- unique(fdi4$date)
data <- subset(data, data$date>datestart & data$date<dateend)
data$datesq <- data$date^2
if(length(data$date)>4){
lm.1 <- lm(pta~date, data=data)
p.value <- xtable(lm.1)[2,4]
fdi4$ptatrend[i] <- lm.1$coefficients[2]
#fdi4$trend[i] <- ifelse(p.value<.1,lm.1$coefficients[2],0)
}
else{
fdi4$ptatrend[i] <- 0
}
}

#fdi4$ptatrend <- ifelse(fdi4$ptatrend>0,1,0)
tr <- which(fdi4$pta1==1)
ct <- which(fdi4$pta1==0)
ntr <- length(tr)
nct <- length(ct)
mean(fdi4$fdi_inflow_unctad_gdp[tr])-mean(fdi4$fdi_inflow_unctad_gdp[ct])

#Analysis with no trend matching
vars <- c("lag_gattwto", "pta1", "fdi_inflow_unctad_gdp", "lag_ln_gdp_pc_95d", "bits")
fdi5 <- na.omit(fdi4[,vars])
imbalance(group=fdi5$pta1, data=fdi5, drop="fdi_inflow_unctad_gdp")
fdi5$lag_gattwto <- as.factor(fdi5$lag_gattwto)
fdi5$region_wb <- as.factor(fdi5$region_wb)
fdi5$bits <- as.factor(fdi5$bits)
gatt.cut <- list(c("0"), c("1"))
bits.cut <- list(c("0"), c("1","2","3"),c("4","5","6","7","8","9","10","11"))
#region.cut <- list(c("0"),c("1"),c("2"),c("3"),c("4"),c("5"),c("6"),c("7"))
mat <- cem(treatment="pta1", data=fdi5, drop="fdi_inflow_unctad_gdp", cutpoint=list (lag_ln_gdp_pc_95d=5), grouping=list(lag_gattwto=gatt.cut, bits=bits.cut))
at.1 <- att(mat, fdi_inflow_unctad_gdp ~ pta1  + lag_gattwto + bits + lag_ln_pop + lag_polconiii_2002 + lag_gdp_gr + lag_polinstability_wvo + lag_ln_gdp_pc_95d, data=fdi4)
at.1$TE
at.1
mat

#Analysis matching on time trend
vars <- c("lag_gattwto", "pta1", "fdi_inflow_unctad_gdp", "lag_ln_gdp_pc_95d", "bits", "trend")
fdi5 <- na.omit(fdi4[,vars])
imbalance(group=fdi5$pta1, data=fdi5, drop="fdi_inflow_unctad_gdp")
fdi5$lag_gattwto <- as.factor(fdi5$lag_gattwto)
fdi5$region_wb <- as.factor(fdi5$region_wb)
fdi5$bits <- as.factor(fdi5$bits)
gatt.cut <- list(c("0"), c("1"))
bits.cut <- list(c("0"), c("1","2","3"),c("4","5","6","7","8","9","10","11"))
#region.cut <- list(c("0"),c("1"),c("2"),c("3"),c("4"),c("5"),c("6"),c("7"))
mat <- cem(treatment="pta1", data=fdi5, drop="fdi_inflow_unctad_gdp", cutpoint=list (lag_ln_gdp_pc_95d=5, trend=4), grouping=list(lag_gattwto=gatt.cut, bits=bits.cut))
at.1 <- att(mat, fdi_inflow_unctad_gdp ~ pta1  + lag_gattwto + bits + lag_ln_pop + lag_polconiii_2002 + lag_gdp_gr + lag_polinstability_wvo + lag_ln_gdp_pc_95d, data=fdi4)
at.1$TE
at.1
mat

#Analysis matching on time and timesq
vars <- c("lag_gattwto", "pta1", "fdi_inflow_unctad_gdp", "lag_ln_gdp_pc_95d", "bits", "trend", "trendsq")
fdi5 <- na.omit(fdi4[,vars])
imbalance(group=fdi5$pta1, data=fdi5, drop="fdi_inflow_unctad_gdp")
fdi5$lag_gattwto <- as.factor(fdi5$lag_gattwto)
fdi5$region_wb <- as.factor(fdi5$region_wb)
fdi5$bits <- as.factor(fdi5$bits)
gatt.cut <- list(c("0"), c("1"))
bits.cut <- list(c("0"), c("1","2","3"),c("4","5","6","7","8","9","10","11"))
#region.cut <- list(c("0"),c("1"),c("2"),c("3"),c("4"),c("5"),c("6"),c("7"))
mat <- cem(treatment="pta1", data=fdi5, drop="fdi_inflow_unctad_gdp", cutpoint=list (lag_ln_gdp_pc_95d=5, trend=4, trendsq=3), grouping=list(lag_gattwto=gatt.cut, bits=bits.cut))
at.1 <- att(mat, fdi_inflow_unctad_gdp ~ pta1  + lag_gattwto + bits + lag_ln_pop + lag_polconiii_2002 + lag_gdp_gr + lag_polinstability_wvo + lag_ln_gdp_pc_95d, data=fdi4)
at.1$TE
at.1
mat

#Analysis matching on PTA trend
vars <- c("lag_gattwto", "pta1", "fdi_inflow_unctad_gdp", "lag_ln_gdp_pc_95d", "bits", "trend", "ptatrend")
fdi5 <- na.omit(fdi4[,vars])
imbalance(group=fdi5$pta1, data=fdi5, drop="fdi_inflow_unctad_gdp")
fdi5$lag_gattwto <- as.factor(fdi5$lag_gattwto)
fdi5$region_wb <- as.factor(fdi5$region_wb)
fdi5$bits <- as.factor(fdi5$bits)
gatt.cut <- list(c("0"), c("1"))
bits.cut <- list(c("0"), c("1","2","3"),c("4","5","6","7","8","9","10","11"))
#region.cut <- list(c("0"),c("1"),c("2"),c("3"),c("4"),c("5"),c("6"),c("7"))
mat <- cem(treatment="pta1", data=fdi5, drop="fdi_inflow_unctad_gdp", cutpoint=list (lag_ln_gdp_pc_95d=5, trend=4, ptatrend=3), grouping=list(lag_gattwto=gatt.cut, bits=bits.cut))
at.1 <- att(mat, fdi_inflow_unctad_gdp ~ pta1  + lag_gattwto + bits + lag_ln_pop + lag_polconiii_2002 + lag_gdp_gr + lag_polinstability_wvo + lag_ln_gdp_pc_95d, data=fdi4)
at.1$TE
at.1
mat

#1995 Analysis

vars.4 <- c("fdi_detrended", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr", "date", "country", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_gattwto", "lag_pta_cuml", 
"fdi_inflow_unctad_gdp", "pta", "lag_gatt", "lag_wto", "ctylabel", "bits", "pta1", "ln_gdp_95d")
fdi4 <- na.omit(fdi[,vars.4])
fdi4 <- subset(fdi4, fdi4$date==1995)
fdi2 <- na.omit(fdi[,vars.4])
country <- unique(fdi4$country)
fdi$trend <- NULL
fdi$trendsq <- NULL
for(i in 1:length(country)){
data <- subset(fdi2, fdi2$country==country[i])
datestart <- 1971
dateend <- unique(fdi4$date)
data <- subset(data, data$date>datestart & data$date<dateend)
data$datesq <- data$date^2
if(length(data$date)>4){
lm.1 <- lm(fdi_inflow_unctad_gdp~date + datesq, data=data)
p.value <- xtable(lm.1)[2,4]
fdi4$trend[i] <- lm.1$coefficients[2]
fdi4$trendsq[i] <- lm.1$coefficients[3]
#fdi4$trend[i] <- ifelse(p.value<.1,lm.1$coefficients[2],0)
}
else{
fdi4$trend[i] <- 0
}
}


#Match on number of PTAs in previous years

fdi$ptatrend <- NULL
for(i in 1:length(country)){
datestart <- unique(fdi4$date)-1
dateend <- unique(fdi4$date)
data <- subset(fdi2, fdi2$country==country[i])
data <- subset(data, data$date==datestart)
if(length(data$date)>0){
fdi4$ptatrend[i] <- data$lag_pta_cuml
}
else{
fdi4$ptatrend[i] <- 0
}
}

fdi$ptatrend <- NULL
for(i in 1:length(country)){
data <- subset(fdi2, fdi2$country==country[i])
datestart <- 1971
dateend <- unique(fdi4$date)
data <- subset(data, data$date>datestart & data$date<dateend)
data$datesq <- data$date^2
if(length(data$date)>4){
lm.1 <- lm(pta~date, data=data)
p.value <- xtable(lm.1)[2,4]
fdi4$ptatrend[i] <- lm.1$coefficients[2]
#fdi4$trend[i] <- ifelse(p.value<.1,lm.1$coefficients[2],0)
}
else{
fdi4$ptatrend[i] <- 0
}
}

#fdi4$ptatrend <- ifelse(fdi4$ptatrend>0,1,0)
tr <- which(fdi4$pta1==1)
ct <- which(fdi4$pta1==0)
ntr <- length(tr)
nct <- length(ct)
mean(fdi4$fdi_inflow_unctad_gdp[tr])-mean(fdi4$fdi_inflow_unctad_gdp[ct])

#Analysis with no trend matching
vars <- c("lag_gattwto", "pta1", "fdi_inflow_unctad_gdp", "lag_ln_gdp_pc_95d", "bits")
fdi5 <- na.omit(fdi4[,vars])
imbalance(group=fdi5$pta1, data=fdi5, drop="fdi_inflow_unctad_gdp")
fdi5$lag_gattwto <- as.factor(fdi5$lag_gattwto)
fdi5$region_wb <- as.factor(fdi5$region_wb)
fdi5$bits <- as.factor(fdi5$bits)
gatt.cut <- list(c("0"), c("1"))
bits.cut <- list(c("0"), c("1","2","3"),c("4","5","6","7","8","9","10","11"))
#region.cut <- list(c("0"),c("1"),c("2"),c("3"),c("4"),c("5"),c("6"),c("7"))
mat <- cem(treatment="pta1", data=fdi5, drop="fdi_inflow_unctad_gdp", cutpoint=list (lag_ln_gdp_pc_95d=5), grouping=list(lag_gattwto=gatt.cut, bits=bits.cut))
at.1 <- att(mat, fdi_inflow_unctad_gdp ~ pta1  + lag_gattwto + bits + lag_ln_pop + lag_polconiii_2002 + lag_gdp_gr + lag_polinstability_wvo + lag_ln_gdp_pc_95d, data=fdi4)
at.1$TE
at.1
mat

#Analysis matching on time trend
vars <- c("lag_gattwto", "pta1", "fdi_inflow_unctad_gdp", "lag_ln_gdp_pc_95d", "bits", "trend")
fdi5 <- na.omit(fdi4[,vars])
imbalance(group=fdi5$pta1, data=fdi5, drop="fdi_inflow_unctad_gdp")
fdi5$lag_gattwto <- as.factor(fdi5$lag_gattwto)
fdi5$region_wb <- as.factor(fdi5$region_wb)
fdi5$bits <- as.factor(fdi5$bits)
gatt.cut <- list(c("0"), c("1"))
bits.cut <- list(c("0"), c("1","2","3"),c("4","5","6","7","8","9","10","11"))
#region.cut <- list(c("0"),c("1"),c("2"),c("3"),c("4"),c("5"),c("6"),c("7"))
mat <- cem(treatment="pta1", data=fdi5, drop="fdi_inflow_unctad_gdp", cutpoint=list (lag_ln_gdp_pc_95d=5, trend=4), grouping=list(lag_gattwto=gatt.cut, bits=bits.cut))
at.1 <- att(mat, fdi_inflow_unctad_gdp ~ pta1  + lag_gattwto + bits + lag_ln_pop + lag_polconiii_2002 + lag_gdp_gr + lag_polinstability_wvo + lag_ln_gdp_pc_95d, data=fdi4)
at.1$TE
at.1
mat

#Analysis matching on time and timesq
vars <- c("lag_gattwto", "pta1", "fdi_inflow_unctad_gdp", "lag_ln_gdp_pc_95d", "bits", "trend", "trendsq")
fdi5 <- na.omit(fdi4[,vars])
imbalance(group=fdi5$pta1, data=fdi5, drop="fdi_inflow_unctad_gdp")
fdi5$lag_gattwto <- as.factor(fdi5$lag_gattwto)
fdi5$region_wb <- as.factor(fdi5$region_wb)
fdi5$bits <- as.factor(fdi5$bits)
gatt.cut <- list(c("0"), c("1"))
bits.cut <- list(c("0"), c("1","2","3"),c("4","5","6","7","8","9","10","11"))
#region.cut <- list(c("0"),c("1"),c("2"),c("3"),c("4"),c("5"),c("6"),c("7"))
mat <- cem(treatment="pta1", data=fdi5, drop="fdi_inflow_unctad_gdp", cutpoint=list (lag_ln_gdp_pc_95d=5, trend=4, trendsq=3), grouping=list(lag_gattwto=gatt.cut, bits=bits.cut))
at.1 <- att(mat, fdi_inflow_unctad_gdp ~ pta1  + lag_gattwto + bits + lag_ln_pop + lag_polconiii_2002 + lag_gdp_gr + lag_polinstability_wvo + lag_ln_gdp_pc_95d, data=fdi4)
at.1$TE
at.1
mat

#Analysis matching on PTA trend
vars <- c("lag_gattwto", "pta1", "fdi_inflow_unctad_gdp", "lag_ln_gdp_pc_95d", "bits", "trend", "ptatrend"s)
fdi5 <- na.omit(fdi4[,vars])
imbalance(group=fdi5$pta1, data=fdi5, drop="fdi_inflow_unctad_gdp")
fdi5$lag_gattwto <- as.factor(fdi5$lag_gattwto)
fdi5$region_wb <- as.factor(fdi5$region_wb)
fdi5$bits <- as.factor(fdi5$bits)
gatt.cut <- list(c("0"), c("1"))
bits.cut <- list(c("0"), c("1","2","3"),c("4","5","6","7","8","9","10","11"))
#region.cut <- list(c("0"),c("1"),c("2"),c("3"),c("4"),c("5"),c("6"),c("7"))
mat <- cem(treatment="pta1", data=fdi5, drop="fdi_inflow_unctad_gdp", cutpoint=list (lag_ln_gdp_pc_95d=5, trend=4, ptatrend=3), grouping=list(lag_gattwto=gatt.cut, bits=bits.cut))
at.1 <- att(mat, fdi_inflow_unctad_gdp ~ pta1  + lag_gattwto + bits + lag_ln_pop + lag_polconiii_2002 + lag_gdp_gr + lag_polinstability_wvo + lag_ln_gdp_pc_95d, data=fdi4)
at.1$TE
at.1
mat

mat$mstrata
group <- mat$mstrataID
fdi4$ctylabel[mat$mstrata==group[1]]
fdi4$ctylabel[mat$mstrata==group[1] & fdi4$pta>0]



########
#Regression Discontinuity Analysis
#######

#Set up
setwd("/Users/mollyroberts/Winter2010/Gov2001/Final Project")
library(foreign)
fdi <- read.dta("MilnerButhe2.dta")
library(lmtest)
library(kinship)
library(Formula)
library(plm)
library(Design)
library(strucchange)
library(AER)
clus <- function(fm, cluster){
library(sandwich)
library(lmtest)
N <- length(cluster)
u <- apply(estfun(fm),2,
function(x) tapply(x, cluster, sum))
vcovCL <- sandwich(fm, meat=crossprod(u)/N)
coeftest(fm, vcovCL) }

#First take difference in PTAs
country <- unique(fdi$country)
fdi <- fdi[order(fdi$date),]
#Create lagged y
for (i in 1:length(unique(fdi$country))){
	pta1 <- fdi$lag_pta_cuml[fdi$country==country[i]]
	n <- length(pta1)
	pta2 <- c(NA,pta1[1:(n-1)])
	fdi$pta[fdi$country==country[i]] <- pta1-pta2
	}
for (i in 1:length(unique(fdi$country))){
	pta1 <- fdi$lag_bits_cuml_restricted[fdi$country==country[i]]
	n <- length(pta1)
	pta2 <- c(NA,pta1[1:(n-1)])
	fdi$bits[fdi$country==country[i]] <- pta1-pta2
	}
fdi$pta1 <- ifelse(fdi$pta>0, 1,0)

#All Countries, Date after 1995 as the instrument

vars.4 <- c("fdi_detrended", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr", "date", "country", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_gattwto", "lag_pta_cuml", 
"fdi_inflow_unctad_gdp", "pta", "lag_gatt", "lag_wto", "ctylabel", "bits", "pta1")
fdi4 <- na.omit(fdi[,vars.4])
fdi4 <- subset(fdi4, fdi4$date>1989)
#Regions
64 countries signed trade agreements in 1994
47 countries less than or zero in 1994
#graphing the probability
#First discontinuity
#fdi4$treat <- ifelse(fdi4$date>1993, 1,0)
#Second discontinuity
#fdi4$treat <- ifelse(fdi4$lag_wto==1,1,0)
fdi4$treat <- ifelse(fdi4$date>1995,1,0)

total <- tapply(fdi4$country[fdi4$pta>0], fdi4$date[fdi4$pta>0], function (x) length(unique(x)))
#total2 <- c(total[1:2], 0, total[3], 0, total[4:7],0,total[8:14],0, total[15:26])
#prob <- total2/length(unique(fdi4$country))
prob <- total/length(unique(fdi4$country))
totalfdi <- tapply(fdi4$fdi_inflow_unctad_gdp, fdi4$date, mean)


#Results
se5 <- ivreg(fdi_inflow_unctad_gdp ~ pta  + lag_gattwto +bits + lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr  + date +factor(country), instruments = ~ +treat  + bits + lag_gattwto +lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr + date + factor(country), data=fdi4)
rd1 <- clus(se5, fdi4$country)

stage11 <- lm(pta ~ treat + lag_gattwto + bits + lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr + date + factor(country), data=fdi4)
fdi4$fit1 <- stage11$fitted.values
stage22 <- lm(fdi_inflow_unctad_gdp ~ fit1+ lag_gattwto + bits + lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr  + date +factor(country), data=fdi4)

#Plot
tx <- which(fdi4$pta>0) #& fdi4$date>1994)
fdi4$plotdate = fdi4$date + rnorm(length(fdi4$date), 0,.25)
plot(fdi4$fdi_inflow_unctad_gdp~ fdi4$plotdate, col="white", ylim=c(0,4), xlim=c(1994,1998), xlab="Year", ylab="FDI Inflow", axes=FALSE)
abline(v=1996, lty=2, lwd=2, col="darkgrey")
#points(fdi4$plotdate[-tx], .25*fdi4$fdi_inflow_unctad_gdp[-tx], col="darkgrey")
points(fdi4$plotdate[tx], .005*fdi4$fdi_inflow_unctad_gdp[tx], col="red", pch=4)
ticks <- c(1994, 1996, 1998)
ticks2 <- c(0,1,2)
axis(2)
axis(1, at=ticks, labels=c("1993","WTO", "1997"))
axis(4,at=ticks2, labels=c("0",".5","1"))

vars.5 <- c("pta", "lag_gattwto", "bits", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_ln_pop", 
"lag_ln_gdp_pc_95d", "lag_gdp_gr", "date")
fdi5 <- na.omit(fdi4[,vars.5])
levls <- apply(fdi5,2,median)
l94 <- c(1,levls[1:9], 1994)
l95 <- c(1,levls[1:3],1995,levls[5:10])
l96 <- c(1,levls[1:3],1996,levls[5:10])
l97 <- c(1,levls[1:3],1997,levls[5:10])
l98 <- c(1,levls[1:3],1998,levls[5:10])
se5.n <- lm(fdi_inflow_unctad_gdp ~ pta + lag_gattwto  + bits + lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr  + date +factor(country), fdi4[fdi4$date<1996,])

#fdi4$fit <- NULL
#fdi4[fdi4$date<1996,]$fit <- se5.n$fit
#lm1 <- lm(fit ~ date, data=fdi4[fdi4$date<1996,])
#pred <- predict(lm1, fdi4[fdi4$date<1997,], interval="confidence")

#fit <- tapply(pred[,1], fdi4$date[fdi4$date<1997], median)
#lwr <- tapply(pred[,2], fdi4$date[fdi4$date<1997], median)
#upr <- tapply(pred[,3], fdi4$date[fdi4$date<1997], median)

#lines(1990:1996, fit, col="orange")
#lines(1990:1996, lwr, col="blue")
#lines(1990:1996, upr, col="blue")
pred <- predict(se5.n, fdi4[fdi4$date<1997& fdi4$country!=911,])

meanpred <- tapply(pred, fdi4$date[fdi4$date<1997& fdi4$country!=911] , mean)
sd <- tapply(pred, fdi4$date[fdi4$date<1997& fdi4$country!=911] , sd)

lines(1990:1996, meanpred, col="orange")

pred <- predict(se5, fdi4)
meanpred <- tapply(pred, fdi4$date, mean)
lines(1996:1999, meanpred[7:10], col="orange")
text(1994.5, 2.24, "FDI Fitted Values", col="orange")
text(1997.5, .75, "Probability of PTA", col="black")
text(1994.3, .2, "PTA Incidence", col="red")
lines(1994:1998, prob[5:9]*2)

#AllCountries, Date and WTO Member Used as Instrument

vars.4 <- c("fdi_detrended", "lag_ln_pop", "lag_ln_gdp_pc_95d", "lag_gdp_gr", "date", "country", "lag_bits_cuml_restricted", "lag_polconiii_2002", "lag_polinstability_wvo", "lag_gattwto", "lag_pta_cuml", 
"fdi_inflow_unctad_gdp", "pta", "lag_gatt", "lag_wto", "ctylabel", "bits", "pta1")
fdi4 <- na.omit(fdi[,vars.4])
#WTO countries only
fdi4 <- subset(fdi4, fdi4$date>1989)

fdi4$treat <- ifelse(fdi4$lag_gattwto==1,1,0)
fdi4$treat <- ifelse(fdi4$date>1995,fdi4$treat,0)


#doing fuzzy discontinuity
se5 <- ivreg(fdi_inflow_unctad_gdp ~ pta  + bits + lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr  + date +factor(country), instruments = ~ +treat  + bits + lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr + date + factor(country), data=fdi4)
rd2 <- clus(se5, fdi4$country)

stage1 <- lm(pta ~ treat+ bits + lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr + date + factor(country), data=fdi4)
fdi4$fit <- stage1$fitted.values
stage2 <- lm(fdi_inflow_unctad_gdp ~ fit + bits + lag_polconiii_2002 + lag_polinstability_wvo + lag_ln_pop + lag_ln_gdp_pc_95d + lag_gdp_gr  + date +factor(country), data=fdi4)

stage22$se <- rd1[,2]
stage2$se <- rd2[,2]
apsrtable(stage22, stage2, digits=4, lev=.1)